read data

nhis = 
  read_csv("data/nhis_data01.csv") %>% 
  janitor::clean_names()
## Rows: 468212 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): NHISHID, NHISPID, HHX, FMX, PX
## dbl (28): YEAR, SERIAL, STRATA, PSU, HHWEIGHT, PERNUM, PERWEIGHT, SAMPWEIGHT...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Worries and anxiety

number of people reported taken medication for worried, nervous, or anxious feeings from 2015 to 2021.

nhis %>% 
  filter(
    year>=2015,
    worrx %in% c(1,2)
  ) %>% 
  mutate(
    worrx = recode_factor(worrx, '1' = "no", '2' = "yes")
  ) %>%
  group_by(year, worrx) %>% 
  summarize(wor_num = n()) %>% 
  pivot_wider(
    names_from = worrx,
    values_from = wor_num
  ) %>% 
  mutate(
    wor_percentage = yes/(no + yes)*100,
    text_label = str_c(yes, " out of ", no + yes)
  ) %>% 
  plot_ly(
    y = ~wor_percentage,
    x = ~year,
    color = ~year,
    type = "bar", 
    colors = "viridis",
    text = ~text_label) %>% 
  layout(title = "Percentage of people reported taken medication for worried, nervous, or anxious feeings each year",
         xaxis = list (title = ""),
         yaxis = list (title = "Percentage")
         ) %>% 
  hide_colorbar()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

Frequency of worries

nhis %>% 
  filter(
    year>=2015,
    worfreq > 0, worfreq < 7
  ) %>%
  mutate(
    worfreq = recode_factor(worfreq, '1' = "Daily", '2' = "Weekly", 
                            '3' = "Monthly", '4' = "A few times a year", 
                            '5' = "Never")
  ) %>% 
  group_by(year, worfreq) %>% 
  summarize(count = n()) %>% 
  group_by(year) %>% 
  summarize(
     percentage=100 * count/sum(count),
     sum_count = sum(count),
     worfreq = worfreq,
     count=count
  ) %>% 
  mutate(
    text_label = str_c(count, " out of ", sum_count)
  ) %>% 
  plot_ly(
    y = ~percentage,
    x = ~year,
    color = ~worfreq,
    type = "bar", 
    colors = "viridis",
    text = ~text_label
  ) %>% 
  layout(
    title = "Distribution of frequency of worries",
    xaxis = list (title = ""),
    yaxis = list (title = "Percentage"), 
    barmode = 'stack',
    legend = list(orientation = 'h')
  )
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

worry level distribution.

nhis %>% 
  filter(
    year>=2015,
    worfeelevl > 0, worfeelevl < 7
  ) %>%
  mutate(
    worfeelevl = recode_factor(worfeelevl, '1' = "A lot", 
                               '3' = "Somewhere between a little and a lot", 
                               '2' = "A little")
  ) %>% 
  group_by(year, worfeelevl) %>% 
  summarize(count = n()) %>% 
  group_by(year) %>% 
  summarize(
     percentage=100 * count/sum(count),
     sum_count = sum(count),
     worfeelevl = worfeelevl,
     count=count
  ) %>% 
  mutate(
    text_label = str_c(count, " out of ", sum_count)
  ) %>% 
  plot_ly(
    y = ~percentage,
    x = ~year,
    color = ~worfeelevl,
    type = "bar", 
    colors = "viridis",
    text = ~text_label
  ) %>% 
  layout(
    title = "Distribution of worry levels",
    xaxis = list (title = ""),
    yaxis = list (title = "Percentage"), 
    barmode = 'stack',
    legend = list(orientation = 'h')
  )
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.